This comprehensive training manual provides essential tips, tricks, shortcuts, and best practices for R programming across various domains. Whether you’re a beginner or intermediate user, this guide will help you write better, faster, and more efficient R code.
# ESSENTIAL KEYBOARD SHORTCUTS (Works on Windows/Mac)
#
# Code Execution:
# Ctrl/Cmd + Enter - Run current line or selection
# Ctrl/Cmd + Shift + Enter - Run entire script
# Ctrl/Cmd + Shift + S - Source entire script
#
# Code Editing:
# Ctrl/Cmd + Shift + C - Comment/uncomment lines
# Ctrl/Cmd + I - Re-indent code
# Ctrl/Cmd + Shift + A - Reformat code
# Alt + - - Insert assignment operator <-
# Ctrl/Cmd + Shift + M - Insert pipe operator %>%
#
# Navigation:
# Ctrl/Cmd + 1 - Move cursor to source editor
# Ctrl/Cmd + 2 - Move cursor to console
# Ctrl/Cmd + Shift + F - Find in files
# Ctrl/Cmd + F - Find/replace in current file
#
# Code Completion:
# Tab - Auto-complete
# Ctrl/Cmd + Space - Show function arguments
# F1 - Open help for function under cursor
#
# Session Management:
# Ctrl/Cmd + Shift + F10 - Restart R session
# Ctrl/Cmd + L - Clear console# 1. Set working directory (DO THIS FIRST!)
setwd("~/R_Projects/MyProject") # Linux/Mac
setwd("C:/Users/YourName/R_Projects/MyProject") # Windows
# Better: Use RStudio Projects (.Rproj files) - they auto-set working directory!
# 2. Check your current directory
getwd()
# 3. List files in directory
list.files()
dir() # Same as list.files()
# 4. Create project structure
dir.create("data")
dir.create("scripts")
dir.create("output")
dir.create("figures")
# 5. Set global options
options(
scipen = 999, # Disable scientific notation
digits = 3, # Number of digits to display
stringsAsFactors = FALSE, # Don't auto-convert strings to factors
repos = "https://cran.rstudio.com/" # Default CRAN mirror
)# DO: Use meaningful variable names
customer_age <- c(25, 30, 35, 40) # GOOD
ca <- c(25, 30, 35, 40) # BAD
# DO: Use <- for assignment (not =)
x <- 5 # GOOD (R convention)
x = 5 # Works, but not preferred
# DO: Add comments to explain your code
# Calculate average customer age
mean_age <- mean(customer_age)
# DO: Use consistent naming convention
# Choose one and stick to it:
snake_case_variable <- "recommended" # snake_case (recommended)
camelCaseVariable <- "also_good" # camelCase
# PascalCaseVariable <- "for_functions" # PascalCase
# DO: Vectorize operations (avoid loops when possible)
# GOOD - Vectorized
numbers <- 1:1000
squares <- numbers^2
# BAD - Loop (slower)
squares <- numeric(1000)
for(i in 1:1000) {
squares[i] <- i^2
}
# DO: Use built-in functions
sum(1:100) # GOOD## [1] 5050
total <- 0; for(i in 1:100) total <- total + i # BAD
# DO: Check data structure regularly
str(mtcars) # Structure## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
## [1] "data.frame"
## [1] 32 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# DON'T: Use attach() - it creates confusion
attach(mtcars) # BAD - creates ambiguity
mpg # Which mpg?
detach(mtcars)
# Instead, use:
mtcars$mpg # GOOD - explicit
with(mtcars, mean(mpg)) # GOOD - clear scope
# DON'T: Grow objects in loops
result <- c()
for(i in 1:1000) {
result <- c(result, i^2) # BAD - very slow!
}
# Instead, pre-allocate:
result <- numeric(1000)
for(i in 1:1000) {
result[i] <- i^2 # GOOD - much faster
}
# DON'T: Use T and F for TRUE and FALSE
x <- T # BAD - T and F can be overwritten!
x <- F # BAD
x <- TRUE # GOOD
x <- FALSE # GOOD
# DON'T: Forget to set.seed() for reproducibility
sample(1:10, 5) # BAD - different results each time
set.seed(123)
sample(1:10, 5) # GOOD - reproducible
# DON'T: Use == for comparing floating point numbers
0.1 + 0.2 == 0.3 # Returns FALSE! (floating point precision)
# Use all.equal() or near()
all.equal(0.1 + 0.2, 0.3) # TRUE
dplyr::near(0.1 + 0.2, 0.3) # TRUE# GOOD: Check if package is installed before installing
if (!require("dplyr")) {
install.packages("dplyr")
}
# GOOD: Install multiple packages at once
packages <- c("dplyr", "ggplot2", "tidyr", "readr")
install.packages(packages)
# GOOD: Use pacman for smart package loading
if (!require("pacman")) install.packages("pacman")
pacman::p_load(dplyr, ggplot2, tidyr, readr) # Installs if needed, then loads
# GOOD: Load packages at the start of your script
library(dplyr)
library(ggplot2)
library(tidyr)
# BAD: Loading packages in the middle of your script
# ... makes it hard to track dependencies
# GOOD: Use package::function() for rare function calls
# Avoids loading entire package
conflicted::conflict_prefer("filter", "dplyr")
# Check installed packages
installed.packages()[, c("Package", "Version")]
# Update all packages
update.packages(ask = FALSE)
# Remove package
remove.packages("package_name")library(dplyr)
library(MASS) # MASS::select() conflicts with dplyr::select()
# SOLUTION 1: Use package prefix
# MASS::select(iris, Species)
# dplyr::select(iris, Species)
# SOLUTION 2: Use conflicted package
library(conflicted)
# conflict_prefer("select", "dplyr")
# conflict_prefer("filter", "dplyr")
# Now select() will always use dplyr version## [1] "numeric"
## [1] "integer"
## [1] "character"
## [1] "logical"
## [1] "factor"
## [1] "F" "M"
## [1] "Date"
## [1] "POSIXct" "POSIXt"
## [1] "double"
## [1] "numeric"
## [1] "numeric"
## num 42
# 1. VECTOR (1-dimensional, same type)
vec_numeric <- c(1, 2, 3, 4, 5)
vec_character <- c("a", "b", "c")
vec_logical <- c(TRUE, FALSE, TRUE)
# Named vectors
ages <- c(John = 25, Jane = 30, Bob = 35)
ages["John"]## John
## 25
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
## [1] 8
## [1] 2 5 8 11
## [1] 7 8 9
# 3. ARRAY (n-dimensional, same type)
arr <- array(1:24, dim = c(3, 4, 2))
# 4. LIST (can contain different types)
my_list <- list(
numbers = 1:5,
text = "hello",
matrix = matrix(1:4, 2, 2),
nested = list(a = 1, b = 2)
)
# Access list elements
my_list$numbers## [1] 1 2 3 4 5
## [1] 1 2 3 4 5
## [1] 1 2 3 4 5
# 5. DATA FRAME (2D, different types in columns)
df <- data.frame(
id = 1:5,
name = c("Alice", "Bob", "Charlie", "David", "Eve"),
age = c(25, 30, 35, 40, 45),
active = c(TRUE, TRUE, FALSE, TRUE, FALSE)
)
str(df)## 'data.frame': 5 obs. of 4 variables:
## $ id : int 1 2 3 4 5
## $ name : chr "Alice" "Bob" "Charlie" "David" ...
## $ age : num 25 30 35 40 45
## $ active: logi TRUE TRUE FALSE TRUE FALSE
# Coercion hierarchy: logical < integer < numeric < character
# Convert between types
as.numeric("42")## [1] 42
## [1] "42"
## [1] TRUE
# Factor to numeric (TRICKY!)
f <- factor(c("10", "20", "30"))
as.numeric(f) # WRONG! Returns 1, 2, 3## [1] 1 2 3
## [1] 10 20 30
# Quick conversion table
conversion_examples <- data.frame(
Operation = c("String to Number", "Number to String", "Factor to Numeric"),
Code = c("as.numeric('42')", "as.character(42)", "as.numeric(as.character(f))"),
stringsAsFactors = FALSE
)
print(conversion_examples)## Operation Code
## 1 String to Number as.numeric('42')
## 2 Number to String as.character(42)
## 3 Factor to Numeric as.numeric(as.character(f))
# IF-ELSE
x <- 10
if (x > 5) {
print("x is greater than 5")
} else if (x == 5) {
print("x equals 5")
} else {
print("x is less than 5")
}## [1] "x is greater than 5"
# Vectorized ifelse()
values <- c(1, 5, 10, 15, 20)
categories <- ifelse(values > 10, "High", "Low")
categories## [1] "Low" "Low" "Low" "High" "High"
# case_when() from dplyr (more readable for multiple conditions)
library(dplyr)
values_df <- data.frame(value = values)
values_df %>%
mutate(category = case_when(
value < 5 ~ "Very Low",
value < 10 ~ "Low",
value < 15 ~ "Medium",
TRUE ~ "High"
))## value category
## 1 1 Very Low
## 2 5 Low
## 3 10 Medium
## 4 15 High
## 5 20 High
## [1] "Iteration: 1"
## [1] "Iteration: 2"
## [1] "Iteration: 3"
## [1] "Iteration: 4"
## [1] "Iteration: 5"
## [1] "Count: 1"
## [1] "Count: 2"
## [1] "Count: 3"
## [1] "Count: 4"
## [1] "Count: 5"
# REPEAT loop (use with break)
count <- 1
repeat {
print(paste("Count:", count))
count <- count + 1
if (count > 5) break
}## [1] "Count: 1"
## [1] "Count: 2"
## [1] "Count: 3"
## [1] "Count: 4"
## [1] "Count: 5"
# BETTER: Use apply family instead of loops
# apply(), lapply(), sapply(), mapply(), tapply()
# Example: Calculate mean of each column
df <- data.frame(
a = 1:5,
b = 6:10,
c = 11:15
)
# Using loop (slower)
means <- numeric(ncol(df))
for (i in 1:ncol(df)) {
means[i] <- mean(df[, i])
}
# Using apply (faster, cleaner)
means <- apply(df, 2, mean) # 2 = columns
means## a b c
## 3 8 13
## a b c
## 3 8 13
# Basic function
calculate_bmi <- function(weight_kg, height_m) {
bmi <- weight_kg / (height_m^2)
return(bmi)
}
calculate_bmi(70, 1.75)## [1] 22.85714
# Function with default arguments
greet <- function(name, greeting = "Hello") {
paste(greeting, name)
}
greet("Alice")## [1] "Hello Alice"
## [1] "Hi Bob"
# Function with multiple returns
statistics <- function(x) {
result <- list(
mean = mean(x),
median = median(x),
sd = sd(x),
min = min(x),
max = max(x)
)
return(result)
}
stats <- statistics(1:100)
stats$mean## [1] 50.5
## [1] 1 4 9 16 25
# NEW: Pipe-friendly functions with {}
library(dplyr)
mtcars %>%
{
data.frame(
mean_mpg = mean(.$mpg),
mean_hp = mean(.$hp)
)
}## mean_mpg mean_hp
## 1 20.09062 146.6875
# Print debugging
my_function <- function(x) {
print(paste("Input:", x)) # Debug print
result <- x * 2
print(paste("Result:", result)) # Debug print
return(result)
}
# Use browser() for interactive debugging
my_function <- function(x) {
browser() # Execution stops here
result <- x * 2
return(result)
}
# Use debug() to step through function
debug(my_function)
my_function(5)
undebug(my_function)
# Use traceback() after error
# ... error occurs ...
traceback()
# Use try() and tryCatch() for error handling
result <- try(log("not a number"), silent = TRUE)
if (inherits(result, "try-error")) {
print("An error occurred!")
}
# Better error handling with tryCatch()
safe_log <- function(x) {
tryCatch(
{
log(x)
},
error = function(e) {
message("Error: ", e$message)
return(NA)
},
warning = function(w) {
message("Warning: ", w$message)
return(log(x))
}
)
}
safe_log("abc")
safe_log(-5)# Base R
df_base <- read.csv("data/file.csv")
df_base <- read.csv("data/file.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
# readr (tidyverse - FASTER and better defaults)
library(readr)
df <- read_csv("data/file.csv") # Better than read.csv()
df <- read_tsv("data/file.txt") # Tab-separated
df <- read_delim("data/file.txt", delim = "|") # Custom delimiter
# Read from URL
url <- "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
df_url <- read_csv(url)
# Read with column specifications
df <- read_csv("data/file.csv",
col_types = cols(
id = col_integer(),
name = col_character(),
date = col_date(format = "%Y-%m-%d"),
value = col_double()
))
# Skip rows
df <- read_csv("data/file.csv", skip = 2)
# Read only first n rows
df <- read_csv("data/file.csv", n_max = 1000)# readxl package (part of tidyverse)
library(readxl)
# Read first sheet
df <- read_excel("data/file.xlsx")
# Read specific sheet
df <- read_excel("data/file.xlsx", sheet = "Sheet2")
df <- read_excel("data/file.xlsx", sheet = 2)
# List all sheets
excel_sheets("data/file.xlsx")
# Read all sheets at once
file_path <- "data/file.xlsx"
all_sheets <- excel_sheets(file_path)
data_list <- lapply(all_sheets, function(x) read_excel(file_path, sheet = x))
names(data_list) <- all_sheets
# Read specific range
df <- read_excel("data/file.xlsx", range = "A1:D10")
# Writing Excel files (openxlsx package)
library(openxlsx)
write.xlsx(df, "output/file.xlsx")
# Write multiple sheets
write.xlsx(list(Sheet1 = df1, Sheet2 = df2), "output/file.xlsx")# SQLite
library(RSQLite)
con <- dbConnect(SQLite(), "data/database.sqlite")
df <- dbReadTable(con, "table_name")
# SQL query
df <- dbGetQuery(con, "SELECT * FROM table_name WHERE value > 100")
# Close connection
dbDisconnect(con)
# PostgreSQL
library(RPostgreSQL)
con <- dbConnect(PostgreSQL(),
dbname = "mydb",
host = "localhost",
port = 5432,
user = "username",
password = "password")
# MySQL
library(RMySQL)
con <- dbConnect(MySQL(),
dbname = "mydb",
host = "localhost",
user = "username",
password = "password")
# Generic DBI interface
library(DBI)
df <- dbReadTable(con, "table_name")
dbWriteTable(con, "new_table", df)# JSON
library(jsonlite)
df <- fromJSON("data/file.json")
data_list <- fromJSON("data/file.json", simplifyDataFrame = FALSE)
# XML
library(XML)
doc <- xmlParse("data/file.xml")
df <- xmlToDataFrame(doc)
# SPSS, SAS, Stata (haven package)
library(haven)
df_spss <- read_sav("data/file.sav") # SPSS
df_sas <- read_sas("data/file.sas7bdat") # SAS
df_stata <- read_dta("data/file.dta") # Stata
# RDS (R native format - FAST!)
saveRDS(df, "data/file.rds")
df <- readRDS("data/file.rds")
# RData (multiple objects)
save(df1, df2, df3, file = "data/workspace.RData")
load("data/workspace.RData")
# feather (fast format for Python/R)
library(feather)
write_feather(df, "data/file.feather")
df <- read_feather("data/file.feather")
# parquet (columnar format)
library(arrow)
write_parquet(df, "data/file.parquet")
df <- read_parquet("data/file.parquet")library(dplyr)
# Sample data
data("mtcars")
df <- mtcars
# 1. SELECT - Choose columns
df %>% select(mpg, cyl, hp)## mpg cyl hp
## Mazda RX4 21.0 6 110
## Mazda RX4 Wag 21.0 6 110
## Datsun 710 22.8 4 93
## Hornet 4 Drive 21.4 6 110
## Hornet Sportabout 18.7 8 175
## Valiant 18.1 6 105
## Duster 360 14.3 8 245
## Merc 240D 24.4 4 62
## Merc 230 22.8 4 95
## Merc 280 19.2 6 123
## Merc 280C 17.8 6 123
## Merc 450SE 16.4 8 180
## Merc 450SL 17.3 8 180
## Merc 450SLC 15.2 8 180
## Cadillac Fleetwood 10.4 8 205
## Lincoln Continental 10.4 8 215
## Chrysler Imperial 14.7 8 230
## Fiat 128 32.4 4 66
## Honda Civic 30.4 4 52
## Toyota Corolla 33.9 4 65
## Toyota Corona 21.5 4 97
## Dodge Challenger 15.5 8 150
## AMC Javelin 15.2 8 150
## Camaro Z28 13.3 8 245
## Pontiac Firebird 19.2 8 175
## Fiat X1-9 27.3 4 66
## Porsche 914-2 26.0 4 91
## Lotus Europa 30.4 4 113
## Ford Pantera L 15.8 8 264
## Ferrari Dino 19.7 6 175
## Maserati Bora 15.0 8 335
## Volvo 142E 21.4 4 109
## cyl carb
## Mazda RX4 6 4
## Mazda RX4 Wag 6 4
## Datsun 710 4 1
## Hornet 4 Drive 6 1
## Hornet Sportabout 8 2
## Valiant 6 1
## Duster 360 8 4
## Merc 240D 4 2
## Merc 230 4 2
## Merc 280 6 4
## Merc 280C 6 4
## Merc 450SE 8 3
## Merc 450SL 8 3
## Merc 450SLC 8 3
## Cadillac Fleetwood 8 4
## Lincoln Continental 8 4
## Chrysler Imperial 8 4
## Fiat 128 4 1
## Honda Civic 4 2
## Toyota Corolla 4 1
## Toyota Corona 4 1
## Dodge Challenger 8 2
## AMC Javelin 8 2
## Camaro Z28 8 4
## Pontiac Firebird 8 2
## Fiat X1-9 4 1
## Porsche 914-2 4 2
## Lotus Europa 4 2
## Ford Pantera L 8 4
## Ferrari Dino 6 6
## Maserati Bora 8 8
## Volvo 142E 4 2
## disp hp
## Mazda RX4 160.0 110
## Mazda RX4 Wag 160.0 110
## Datsun 710 108.0 93
## Hornet 4 Drive 258.0 110
## Hornet Sportabout 360.0 175
## Valiant 225.0 105
## Duster 360 360.0 245
## Merc 240D 146.7 62
## Merc 230 140.8 95
## Merc 280 167.6 123
## Merc 280C 167.6 123
## Merc 450SE 275.8 180
## Merc 450SL 275.8 180
## Merc 450SLC 275.8 180
## Cadillac Fleetwood 472.0 205
## Lincoln Continental 460.0 215
## Chrysler Imperial 440.0 230
## Fiat 128 78.7 66
## Honda Civic 75.7 52
## Toyota Corolla 71.1 65
## Toyota Corona 120.1 97
## Dodge Challenger 318.0 150
## AMC Javelin 304.0 150
## Camaro Z28 350.0 245
## Pontiac Firebird 400.0 175
## Fiat X1-9 79.0 66
## Porsche 914-2 120.3 91
## Lotus Europa 95.1 113
## Ford Pantera L 351.0 264
## Ferrari Dino 145.0 175
## Maserati Bora 301.0 335
## Volvo 142E 121.0 109
## drat am gear carb
## Mazda RX4 3.90 1 4 4
## Mazda RX4 Wag 3.90 1 4 4
## Datsun 710 3.85 1 4 1
## Hornet 4 Drive 3.08 0 3 1
## Hornet Sportabout 3.15 0 3 2
## Valiant 2.76 0 3 1
## Duster 360 3.21 0 3 4
## Merc 240D 3.69 0 4 2
## Merc 230 3.92 0 4 2
## Merc 280 3.92 0 4 4
## Merc 280C 3.92 0 4 4
## Merc 450SE 3.07 0 3 3
## Merc 450SL 3.07 0 3 3
## Merc 450SLC 3.07 0 3 3
## Cadillac Fleetwood 2.93 0 3 4
## Lincoln Continental 3.00 0 3 4
## Chrysler Imperial 3.23 0 3 4
## Fiat 128 4.08 1 4 1
## Honda Civic 4.93 1 4 2
## Toyota Corolla 4.22 1 4 1
## Toyota Corona 3.70 0 3 1
## Dodge Challenger 2.76 0 3 2
## AMC Javelin 3.15 0 3 2
## Camaro Z28 3.73 0 3 4
## Pontiac Firebird 3.08 0 3 2
## Fiat X1-9 4.08 1 4 1
## Porsche 914-2 4.43 1 5 2
## Lotus Europa 3.77 1 5 2
## Ford Pantera L 4.22 1 5 4
## Ferrari Dino 3.62 1 5 6
## Maserati Bora 3.54 1 5 8
## Volvo 142E 4.11 1 4 2
## mpg cyl disp hp
## Mazda RX4 21.0 6 160.0 110
## Mazda RX4 Wag 21.0 6 160.0 110
## Datsun 710 22.8 4 108.0 93
## Hornet 4 Drive 21.4 6 258.0 110
## Hornet Sportabout 18.7 8 360.0 175
## Valiant 18.1 6 225.0 105
## Duster 360 14.3 8 360.0 245
## Merc 240D 24.4 4 146.7 62
## Merc 230 22.8 4 140.8 95
## Merc 280 19.2 6 167.6 123
## Merc 280C 17.8 6 167.6 123
## Merc 450SE 16.4 8 275.8 180
## Merc 450SL 17.3 8 275.8 180
## Merc 450SLC 15.2 8 275.8 180
## Cadillac Fleetwood 10.4 8 472.0 205
## Lincoln Continental 10.4 8 460.0 215
## Chrysler Imperial 14.7 8 440.0 230
## Fiat 128 32.4 4 78.7 66
## Honda Civic 30.4 4 75.7 52
## Toyota Corolla 33.9 4 71.1 65
## Toyota Corona 21.5 4 120.1 97
## Dodge Challenger 15.5 8 318.0 150
## AMC Javelin 15.2 8 304.0 150
## Camaro Z28 13.3 8 350.0 245
## Pontiac Firebird 19.2 8 400.0 175
## Fiat X1-9 27.3 4 79.0 66
## Porsche 914-2 26.0 4 120.3 91
## Lotus Europa 30.4 4 95.1 113
## Ford Pantera L 15.8 8 351.0 264
## Ferrari Dino 19.7 6 145.0 175
## Maserati Bora 15.0 8 301.0 335
## Volvo 142E 21.4 4 121.0 109
## disp hp drat wt qsec vs am gear carb
## Mazda RX4 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
# 3. MUTATE - Create/modify columns
df %>%
mutate(
mpg_per_cyl = mpg / cyl,
hp_category = ifelse(hp > 150, "High", "Low")
)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## mpg_per_cyl hp_category
## Mazda RX4 3.500000 Low
## Mazda RX4 Wag 3.500000 Low
## Datsun 710 5.700000 Low
## Hornet 4 Drive 3.566667 Low
## Hornet Sportabout 2.337500 High
## Valiant 3.016667 Low
## Duster 360 1.787500 High
## Merc 240D 6.100000 Low
## Merc 230 5.700000 Low
## Merc 280 3.200000 Low
## Merc 280C 2.966667 Low
## Merc 450SE 2.050000 High
## Merc 450SL 2.162500 High
## Merc 450SLC 1.900000 High
## Cadillac Fleetwood 1.300000 High
## Lincoln Continental 1.300000 High
## Chrysler Imperial 1.837500 High
## Fiat 128 8.100000 Low
## Honda Civic 7.600000 Low
## Toyota Corolla 8.475000 Low
## Toyota Corona 5.375000 Low
## Dodge Challenger 1.937500 Low
## AMC Javelin 1.900000 Low
## Camaro Z28 1.662500 High
## Pontiac Firebird 2.400000 High
## Fiat X1-9 6.825000 Low
## Porsche 914-2 6.500000 Low
## Lotus Europa 7.600000 Low
## Ford Pantera L 1.975000 High
## Ferrari Dino 3.283333 High
## Maserati Bora 1.875000 High
## Volvo 142E 5.350000 Low
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## mpg cyl disp hp drat wt qsec vs am gear carb
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## mpg cyl disp hp drat wt qsec vs am gear carb
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
# 5. SUMMARISE - Aggregate data
df %>%
summarise(
mean_mpg = mean(mpg),
median_mpg = median(mpg),
sd_mpg = sd(mpg),
n = n()
)## mean_mpg median_mpg sd_mpg n
## 1 20.09062 19.2 6.026948 32
# 6. GROUP_BY - Group operations
df %>%
group_by(cyl) %>%
summarise(
mean_mpg = mean(mpg),
mean_hp = mean(hp),
count = n()
)## # A tibble: 3 × 4
## cyl mean_mpg mean_hp count
## <dbl> <dbl> <dbl> <int>
## 1 4 26.7 82.6 11
## 2 6 19.7 122. 7
## 3 8 15.1 209. 14
## cyl
## Mazda RX4 6
## Datsun 710 4
## Hornet Sportabout 8
## cyl gear
## Mazda RX4 6 4
## Datsun 710 4 4
## Hornet 4 Drive 6 3
## Hornet Sportabout 8 3
## Toyota Corona 4 3
## Porsche 914-2 4 5
## Ford Pantera L 8 5
## Ferrari Dino 6 5
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.6 1 1 4 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## mpg cyl disp hp drat wt qsec vs am gear carb
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## miles_per_gallon cyl disp hp drat wt qsec vs am gear
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4
## carb
## Mazda RX4 4
## Mazda RX4 Wag 4
## Datsun 710 1
## Hornet 4 Drive 1
## Hornet Sportabout 2
## Valiant 1
## Duster 360 4
## Merc 240D 2
## Merc 230 2
## Merc 280 4
## Merc 280C 4
## Merc 450SE 3
## Merc 450SL 3
## Merc 450SLC 3
## Cadillac Fleetwood 4
## Lincoln Continental 4
## Chrysler Imperial 4
## Fiat 128 1
## Honda Civic 2
## Toyota Corolla 1
## Toyota Corona 1
## Dodge Challenger 2
## AMC Javelin 2
## Camaro Z28 4
## Pontiac Firebird 2
## Fiat X1-9 1
## Porsche 914-2 2
## Lotus Europa 2
## Ford Pantera L 4
## Ferrari Dino 6
## Maserati Bora 8
## Volvo 142E 2
## hp mpg cyl disp drat wt qsec vs am gear carb
## Mazda RX4 110 21.0 6 160.0 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 110 21.0 6 160.0 3.90 2.875 17.02 0 1 4 4
## Datsun 710 93 22.8 4 108.0 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 110 21.4 6 258.0 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 175 18.7 8 360.0 3.15 3.440 17.02 0 0 3 2
## Valiant 105 18.1 6 225.0 2.76 3.460 20.22 1 0 3 1
## Duster 360 245 14.3 8 360.0 3.21 3.570 15.84 0 0 3 4
## Merc 240D 62 24.4 4 146.7 3.69 3.190 20.00 1 0 4 2
## Merc 230 95 22.8 4 140.8 3.92 3.150 22.90 1 0 4 2
## Merc 280 123 19.2 6 167.6 3.92 3.440 18.30 1 0 4 4
## Merc 280C 123 17.8 6 167.6 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 180 16.4 8 275.8 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 180 17.3 8 275.8 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 180 15.2 8 275.8 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 205 10.4 8 472.0 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 215 10.4 8 460.0 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 230 14.7 8 440.0 3.23 5.345 17.42 0 0 3 4
## Fiat 128 66 32.4 4 78.7 4.08 2.200 19.47 1 1 4 1
## Honda Civic 52 30.4 4 75.7 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 65 33.9 4 71.1 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 97 21.5 4 120.1 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 150 15.5 8 318.0 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 150 15.2 8 304.0 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 245 13.3 8 350.0 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 175 19.2 8 400.0 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 66 27.3 4 79.0 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 91 26.0 4 120.3 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 113 30.4 4 95.1 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 264 15.8 8 351.0 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 175 19.7 6 145.0 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 335 15.0 8 301.0 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 109 21.4 4 121.0 4.11 2.780 18.60 1 1 4 2
## mpg disp hp drat wt qsec vs am gear carb cyl
## Mazda RX4 21.0 160.0 110 3.90 2.620 16.46 0 1 4 4 6
## Mazda RX4 Wag 21.0 160.0 110 3.90 2.875 17.02 0 1 4 4 6
## Datsun 710 22.8 108.0 93 3.85 2.320 18.61 1 1 4 1 4
## Hornet 4 Drive 21.4 258.0 110 3.08 3.215 19.44 1 0 3 1 6
## Hornet Sportabout 18.7 360.0 175 3.15 3.440 17.02 0 0 3 2 8
## Valiant 18.1 225.0 105 2.76 3.460 20.22 1 0 3 1 6
## Duster 360 14.3 360.0 245 3.21 3.570 15.84 0 0 3 4 8
## Merc 240D 24.4 146.7 62 3.69 3.190 20.00 1 0 4 2 4
## Merc 230 22.8 140.8 95 3.92 3.150 22.90 1 0 4 2 4
## Merc 280 19.2 167.6 123 3.92 3.440 18.30 1 0 4 4 6
## Merc 280C 17.8 167.6 123 3.92 3.440 18.90 1 0 4 4 6
## Merc 450SE 16.4 275.8 180 3.07 4.070 17.40 0 0 3 3 8
## Merc 450SL 17.3 275.8 180 3.07 3.730 17.60 0 0 3 3 8
## Merc 450SLC 15.2 275.8 180 3.07 3.780 18.00 0 0 3 3 8
## Cadillac Fleetwood 10.4 472.0 205 2.93 5.250 17.98 0 0 3 4 8
## Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82 0 0 3 4 8
## Chrysler Imperial 14.7 440.0 230 3.23 5.345 17.42 0 0 3 4 8
## Fiat 128 32.4 78.7 66 4.08 2.200 19.47 1 1 4 1 4
## Honda Civic 30.4 75.7 52 4.93 1.615 18.52 1 1 4 2 4
## Toyota Corolla 33.9 71.1 65 4.22 1.835 19.90 1 1 4 1 4
## Toyota Corona 21.5 120.1 97 3.70 2.465 20.01 1 0 3 1 4
## Dodge Challenger 15.5 318.0 150 2.76 3.520 16.87 0 0 3 2 8
## AMC Javelin 15.2 304.0 150 3.15 3.435 17.30 0 0 3 2 8
## Camaro Z28 13.3 350.0 245 3.73 3.840 15.41 0 0 3 4 8
## Pontiac Firebird 19.2 400.0 175 3.08 3.845 17.05 0 0 3 2 8
## Fiat X1-9 27.3 79.0 66 4.08 1.935 18.90 1 1 4 1 4
## Porsche 914-2 26.0 120.3 91 4.43 2.140 16.70 0 1 5 2 4
## Lotus Europa 30.4 95.1 113 3.77 1.513 16.90 1 1 5 2 4
## Ford Pantera L 15.8 351.0 264 4.22 3.170 14.50 0 1 5 4 8
## Ferrari Dino 19.7 145.0 175 3.62 2.770 15.50 0 1 5 6 6
## Maserati Bora 15.0 301.0 335 3.54 3.570 14.60 0 1 5 8 8
## Volvo 142E 21.4 121.0 109 4.11 2.780 18.60 1 1 4 2 4
## cyl n
## 1 4 11
## 2 6 7
## 3 8 14
## cyl gear n
## 1 4 3 1
## 2 4 4 8
## 3 4 5 2
## 4 6 3 2
## 5 6 4 4
## 6 6 5 1
## 7 8 3 12
## 8 8 5 2
## mpg cyl disp hp drat wt qsec vs am gear carb row_id
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 1
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 2
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 3
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 4
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 5
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 6
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 7
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 8
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 9
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 10
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 11
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 12
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 13
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 14
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 15
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 16
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 17
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 18
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 19
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 20
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 21
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 22
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 23
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 24
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 25
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 26
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 27
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 28
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 29
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 30
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 31
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 32
# Cumulative operations
df %>%
arrange(mpg) %>%
mutate(
cumsum_mpg = cumsum(mpg),
rank_mpg = min_rank(mpg),
percent_rank_mpg = percent_rank(mpg)
)## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## cumsum_mpg rank_mpg percent_rank_mpg
## Cadillac Fleetwood 10.4 1 0.00000000
## Lincoln Continental 20.8 1 0.00000000
## Camaro Z28 34.1 3 0.06451613
## Duster 360 48.4 4 0.09677419
## Chrysler Imperial 63.1 5 0.12903226
## Maserati Bora 78.1 6 0.16129032
## Merc 450SLC 93.3 7 0.19354839
## AMC Javelin 108.5 7 0.19354839
## Dodge Challenger 124.0 9 0.25806452
## Ford Pantera L 139.8 10 0.29032258
## Merc 450SE 156.2 11 0.32258065
## Merc 450SL 173.5 12 0.35483871
## Merc 280C 191.3 13 0.38709677
## Valiant 209.4 14 0.41935484
## Hornet Sportabout 228.1 15 0.45161290
## Merc 280 247.3 16 0.48387097
## Pontiac Firebird 266.5 16 0.48387097
## Ferrari Dino 286.2 18 0.54838710
## Mazda RX4 307.2 19 0.58064516
## Mazda RX4 Wag 328.2 19 0.58064516
## Hornet 4 Drive 349.6 21 0.64516129
## Volvo 142E 371.0 21 0.64516129
## Toyota Corona 392.5 23 0.70967742
## Datsun 710 415.3 24 0.74193548
## Merc 230 438.1 24 0.74193548
## Merc 240D 462.5 26 0.80645161
## Porsche 914-2 488.5 27 0.83870968
## Fiat X1-9 515.8 28 0.87096774
## Honda Civic 546.2 29 0.90322581
## Lotus Europa 576.6 29 0.90322581
## Fiat 128 609.0 31 0.96774194
## Toyota Corolla 642.9 32 1.00000000
# Window functions
df %>%
group_by(cyl) %>%
mutate(
mpg_rank_in_group = rank(mpg),
mpg_vs_group_mean = mpg - mean(mpg)
)## # A tibble: 32 × 13
## # Groups: cyl [3]
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
## 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
## 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
## 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
## 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
## 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
## 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
## 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
## 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
## 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
## # ℹ 22 more rows
## # ℹ 2 more variables: mpg_rank_in_group <dbl>, mpg_vs_group_mean <dbl>
# Multiple summaries
df %>%
group_by(cyl) %>%
summarise(
across(c(mpg, hp), list(mean = mean, sd = sd))
)## # A tibble: 3 × 5
## cyl mpg_mean mpg_sd hp_mean hp_sd
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 26.7 4.51 82.6 20.9
## 2 6 19.7 1.45 122. 24.3
## 3 8 15.1 2.56 209. 51.0
# Conditional mutations
df %>%
mutate(
efficiency = case_when(
mpg > 25 ~ "High",
mpg > 20 ~ "Medium",
mpg > 15 ~ "Low",
TRUE ~ "Very Low"
)
)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## efficiency
## Mazda RX4 Medium
## Mazda RX4 Wag Medium
## Datsun 710 Medium
## Hornet 4 Drive Medium
## Hornet Sportabout Low
## Valiant Low
## Duster 360 Very Low
## Merc 240D Medium
## Merc 230 Medium
## Merc 280 Low
## Merc 280C Low
## Merc 450SE Low
## Merc 450SL Low
## Merc 450SLC Low
## Cadillac Fleetwood Very Low
## Lincoln Continental Very Low
## Chrysler Imperial Very Low
## Fiat 128 High
## Honda Civic High
## Toyota Corolla High
## Toyota Corona Medium
## Dodge Challenger Low
## AMC Javelin Low
## Camaro Z28 Very Low
## Pontiac Firebird Low
## Fiat X1-9 High
## Porsche 914-2 High
## Lotus Europa High
## Ford Pantera L Low
## Ferrari Dino Low
## Maserati Bora Very Low
## Volvo 142E Medium
# Sample data
customers <- data.frame(
id = 1:5,
name = c("Alice", "Bob", "Charlie", "David", "Eve")
)
orders <- data.frame(
order_id = 1:6,
customer_id = c(1, 1, 2, 3, 3, 6),
amount = c(100, 150, 200, 50, 75, 300)
)
# INNER JOIN - Only matching rows
inner_join(customers, orders, by = c("id" = "customer_id"))## id name order_id amount
## 1 1 Alice 1 100
## 2 1 Alice 2 150
## 3 2 Bob 3 200
## 4 3 Charlie 4 50
## 5 3 Charlie 5 75
# LEFT JOIN - All from left, matching from right
left_join(customers, orders, by = c("id" = "customer_id"))## id name order_id amount
## 1 1 Alice 1 100
## 2 1 Alice 2 150
## 3 2 Bob 3 200
## 4 3 Charlie 4 50
## 5 3 Charlie 5 75
## 6 4 David NA NA
## 7 5 Eve NA NA
# RIGHT JOIN - All from right, matching from left
right_join(customers, orders, by = c("id" = "customer_id"))## id name order_id amount
## 1 1 Alice 1 100
## 2 1 Alice 2 150
## 3 2 Bob 3 200
## 4 3 Charlie 4 50
## 5 3 Charlie 5 75
## 6 6 <NA> 6 300
## id name order_id amount
## 1 1 Alice 1 100
## 2 1 Alice 2 150
## 3 2 Bob 3 200
## 4 3 Charlie 4 50
## 5 3 Charlie 5 75
## 6 4 David NA NA
## 7 5 Eve NA NA
## 8 6 <NA> 6 300
## id name
## 1 4 David
## 2 5 Eve
# SEMI JOIN - Rows in left that have match in right
semi_join(customers, orders, by = c("id" = "customer_id"))## id name
## 1 1 Alice
## 2 2 Bob
## 3 3 Charlie
# Create sample data with missing values
library(tidyr)
df <- data.frame(
id = 1:10,
value1 = c(1, 2, NA, 4, 5, NA, 7, 8, 9, 10),
value2 = c(NA, 2, 3, NA, 5, 6, 7, NA, 9, 10)
)
# Check for missing values
sum(is.na(df))## [1] 5
## id value1 value2
## 0 2 3
## [1] FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE TRUE TRUE
## id value1 value2
## 2 2 2 2
## 5 5 5 5
## 7 7 7 7
## 9 9 9 9
## 10 10 10 10
## id value1 value2
## 1 2 2 2
## 2 5 5 5
## 3 7 7 7
## 4 9 9 9
## 5 10 10 10
## id value1 value2
## 1 1 1 NA
## 2 2 2 2
## 3 4 4 NA
## 4 5 5 5
## 5 7 7 7
## 6 8 8 NA
## 7 9 9 9
## 8 10 10 10
## id value1 value2
## 1 1 1 NA
## 2 2 2 2
## 3 3 0 3
## 4 4 4 NA
## 5 5 5 5
## 6 6 0 6
## 7 7 7 7
## 8 8 8 NA
## 9 9 9 9
## 10 10 10 10
# Replace NA with mean
df %>%
mutate(value1 = ifelse(is.na(value1), mean(value1, na.rm = TRUE), value1))## id value1 value2
## 1 1 1.00 NA
## 2 2 2.00 2
## 3 3 5.75 3
## 4 4 4.00 NA
## 5 5 5.00 5
## 6 6 5.75 6
## 7 7 7.00 7
## 8 8 8.00 NA
## 9 9 9.00 9
## 10 10 10.00 10
## id value1 value2
## 1 1 1 NA
## 2 2 2 2
## 3 3 2 3
## 4 4 4 NA
## 5 5 5 5
## 6 6 5 6
## 7 7 7 7
## 8 8 8 NA
## 9 9 9 9
## 10 10 10 10
# Count missing by group
df %>%
group_by(id %% 2) %>%
summarise(
missing_value1 = sum(is.na(value1)),
missing_value2 = sum(is.na(value2))
)## # A tibble: 2 × 3
## `id%%2` missing_value1 missing_value2
## <dbl> <int> <int>
## 1 0 1 2
## 2 1 1 1
# Create data with duplicates
df <- data.frame(
id = c(1, 2, 2, 3, 4, 4, 4),
name = c("A", "B", "B", "C", "D", "D", "D"),
value = c(10, 20, 20, 30, 40, 40, 45)
)
# Find duplicates
df %>% filter(duplicated(.) | duplicated(., fromLast = TRUE))## id name value
## 1 2 B 20
## 2 2 B 20
## 3 4 D 40
## 4 4 D 40
## id name value
## 1 1 A 10
## 2 2 B 20
## 3 3 C 30
## 4 4 D 40
## 5 4 D 45
## id name value
## 1 1 A 10
## 2 2 B 20
## 3 3 C 30
## 4 4 D 40
# Keep row with max value per group
df %>%
group_by(id, name) %>%
slice_max(value, n = 1) %>%
ungroup()## # A tibble: 5 × 3
## id name value
## <dbl> <chr> <dbl>
## 1 1 A 10
## 2 2 B 20
## 3 2 B 20
## 4 3 C 30
## 5 4 D 45
library(stringr)
# Sample messy data
text <- c(" Hello World ", "UPPER case", "under_score", "123-456")
# Trim whitespace
str_trim(text)## [1] "Hello World" "UPPER case" "under_score" "123-456"
## [1] " hello world " "upper case" "under_score" "123-456"
## [1] " HELLO WORLD " "UPPER CASE" "UNDER_SCORE" "123-456"
## [1] " Hello World " "Upper Case" "Under_score" "123-456"
## [1] "_ Hello World " "UPPER_case" "under_score" "123-456"
## [1] " Hello World " "UPPER case" "under_score" "XXX-XXX"
## [1] NA NA NA "123"
## [[1]]
## character(0)
##
## [[2]]
## character(0)
##
## [[3]]
## character(0)
##
## [[4]]
## [1] "1" "2" "3" "4" "5" "6"
## [1] FALSE FALSE FALSE TRUE
## [[1]]
## [1] "" "" "Hello" "World" "" ""
##
## [[2]]
## [1] "UPPER" "case"
##
## [[3]]
## [1] "under_score"
##
## [[4]]
## [1] "123-456"
## [1] " Hello World | UPPER case | under_score | 123-456"
## [1] " Hello World | UPPER case | under_score | 123-456"
## [1] "ID_1" "ID_2" "ID_3" "ID_4"
# CSV
write.csv(df, "output/file.csv", row.names = FALSE)
write_csv(df, "output/file.csv") # readr version (faster)
# Excel
library(openxlsx)
write.xlsx(df, "output/file.xlsx")
# Multiple sheets
write.xlsx(list(Sheet1 = df1, Sheet2 = df2), "output/file.xlsx")
# RDS (recommended for R-to-R)
saveRDS(df, "output/file.rds")
# RData (multiple objects)
save(df1, df2, df3, file = "output/workspace.RData")
# Tab-separated
write.table(df, "output/file.txt", sep = "\t", row.names = FALSE)
# JSON
library(jsonlite)
write_json(df, "output/file.json")
# Parquet (efficient for large data)
library(arrow)
write_parquet(df, "output/file.parquet")## 7208 bytes
## 7 Kb
# List all objects and their sizes
sort(sapply(ls(), function(x) object.size(get(x))), decreasing = TRUE)## squares mtcars numbers greet
## 8048 7208 4048 4024
## statistics calculate_bmi conversion_examples df
## 2744 1904 1400 1400
## my_list customers orders stats
## 1384 1208 1128 864
## values_df f gender ages
## 776 648 576 440
## means arr now text
## 440 352 344 328
## today mat vec_character categories
## 280 264 248 208
## camelCaseVariable name snake_case_variable x
## 120 120 120 112
## values vec_numeric ca customer_age
## 96 96 80 80
## vec_logical count i is_active
## 64 56 56 56
## mean_age total y
## 56 56 56
# Remove objects
rm(large_object)
# Clear workspace
# rm(list = ls()) # Use with caution!
# Garbage collection (free up memory)
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 925470 49.5 1801071 96.2 1335235 71.4
## Vcells 1728715 13.2 8388608 64.0 2728732 20.9
## [1] Inf
# Read in chunks using readr
library(readr)
# Define chunk size
chunk_size <- 10000
# Read and process in chunks
process_chunk <- function(chunk, pos) {
# Your processing here
summarised <- chunk %>%
group_by(category) %>%
summarise(mean_value = mean(value))
return(summarised)
}
# Read file in chunks
results <- read_csv_chunked(
"data/large_file.csv",
callback = DataFrameCallback$new(process_chunk),
chunk_size = chunk_size
)
# Alternative: readr with callback
f <- function(x, pos) {
subset(x, value > 100)
}
large_subset <- read_csv_chunked("data/large_file.csv",
DataFrameCallback$new(f),
chunk_size = 10000)library(data.table)
# Create sample data
set.seed(123)
dt <- data.table(
id = 1:1000000,
group = sample(LETTERS[1:5], 1000000, replace = TRUE),
value = rnorm(1000000)
)
# Fast subsetting (by reference)
dt[value > 0]## id group value
## <int> <char> <num>
## 1: 1 C 0.6795146
## 2: 3 B 0.5764687
## 3: 4 B 1.4719505
## 4: 11 E 1.6074790
## 5: 12 C 1.4891379
## ---
## 501065: 999981 B 1.3670128
## 501066: 999986 D 2.8043210
## 501067: 999989 E 1.4282585
## 501068: 999999 E 1.6899913
## 501069: 1000000 D 2.8570567
## group mean_value
## <char> <num>
## 1: C 0.002219641
## 2: B 0.004007285
## 3: E 0.001370850
## 4: D 0.003229437
## 5: A 0.001607565
## group mean_val sd_val count
## <char> <num> <num> <int>
## 1: C 0.002219641 0.9997533 199757
## 2: B 0.004007285 1.0027697 199665
## 3: E 0.001370850 1.0002375 200292
## 4: D 0.003229437 0.9999474 200212
## 5: A 0.001607565 0.9975856 200074
# Update by reference (no copy!)
dt[, new_column := value * 2]
# Conditional update
dt[value > 0, category := "positive"]
dt[value <= 0, category := "non-positive"]
# Remove column
dt[, category := NULL]
# Chaining operations
dt[value > 0][order(-value)][, .(id, value)]## id value
## <int> <num>
## 1: 193504 4.988298e+00
## 2: 444817 4.729713e+00
## 3: 99193 4.568838e+00
## 4: 462545 4.498943e+00
## 5: 20296 4.477925e+00
## ---
## 501065: 81872 1.212652e-05
## 501066: 757893 1.163082e-05
## 501067: 353926 1.101934e-05
## 501068: 913215 7.906741e-06
## 501069: 392918 1.790613e-06
library(arrow)
library(dplyr)
# Read parquet file (doesn't load into memory)
dataset <- open_dataset("data/large_file.parquet")
# Query without loading full data
result <- dataset %>%
filter(value > 100) %>%
group_by(category) %>%
summarise(mean_value = mean(value)) %>%
collect() # Only now is data loaded
# Read multiple parquet files as one dataset
dataset <- open_dataset("data/partitioned_data/")
# Write partitioned parquet
df %>%
group_by(year, month) %>%
write_dataset("data/partitioned_data/", format = "parquet")library(parallel)
# Detect number of cores
num_cores <- detectCores()
print(paste("Available cores:", num_cores))
# Create cluster
cl <- makeCluster(num_cores - 1) # Leave one core free
# Parallel lapply
results <- parLapply(cl, 1:1000, function(x) {
# Your computation here
x^2
})
# Stop cluster when done
stopCluster(cl)
# Using mclapply (Unix/Mac only)
results <- mclapply(1:1000, function(x) x^2, mc.cores = num_cores - 1)
# Example: Parallel data processing
library(doParallel)
registerDoParallel(cores = num_cores - 1)
library(foreach)
results <- foreach(i = 1:1000, .combine = rbind) %dopar% {
# Your processing
data.frame(id = i, result = i^2)
}
stopImplicitCluster()library(furrr)
library(purrr)
# Setup parallel processing
plan(multisession, workers = 4)
# Regular purrr
result_sequential <- map(1:100, ~ slow_function(.x))
# Parallel purrr
result_parallel <- future_map(1:100, ~ slow_function(.x))
# With progress bar
result_parallel <- future_map(1:100, ~ slow_function(.x), .progress = TRUE)library(dbplyr)
library(RSQLite)
# Connect to database
con <- dbConnect(SQLite(), "data/large_database.sqlite")
# Create lazy table reference
db_table <- tbl(con, "large_table")
# All operations are lazy (not executed until needed)
result <- db_table %>%
filter(value > 100) %>%
group_by(category) %>%
summarise(mean_value = mean(value))
# View SQL query that will be generated
show_query(result)
# Execute and collect results
final_result <- collect(result)
# Close connection
dbDisconnect(con)library(ggplot2)
# Basic template
# ggplot(data, aes(x, y, ...)) + geom_*() + ...
# Scatter plot
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point()# With color
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point(size = 3) +
labs(title = "Fuel Efficiency vs Weight",
x = "Weight (1000 lbs)",
y = "Miles Per Gallon",
color = "Cylinders")# Line plot
ggplot(economics, aes(x = date, y = unemploy)) +
geom_line(color = "blue", size = 1) +
theme_minimal()# Bar plot
ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar(fill = "steelblue") +
labs(title = "Count by Cylinder", x = "Cylinders", y = "Count")# Histogram
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(bins = 10, fill = "darkgreen", color = "white") +
theme_classic()# Box plot
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.2, alpha = 0.3)library(dplyr)
# Multiple geoms
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = factor(cyl)), size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_minimal() +
labs(title = "MPG vs Weight with Linear Trend")# Custom themes
custom_theme <- theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 12),
legend.position = "bottom"
)
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point(size = 3) +
custom_theme# Color palettes
library(viridis)
ggplot(mtcars, aes(x = wt, y = mpg, color = hp)) +
geom_point(size = 4) +
scale_color_viridis_c() +
theme_dark()# Annotations
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
annotate("text", x = 4, y = 30, label = "Annotation", size = 5) +
annotate("rect", xmin = 3, xmax = 4, ymin = 25, ymax = 30,
alpha = 0.2, fill = "red")# Coordinates
ggplot(mtcars, aes(x = factor(cyl), fill = factor(gear))) +
geom_bar() +
coord_polar() # Pie-like chart# Scales
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
scale_x_continuous(breaks = seq(0, 6, 1)) +
scale_y_continuous(trans = "log10")# Ensure the output directory exists
if (!dir.exists("output")) {
dir.create("output", recursive = TRUE)
}
# Save the last plot
ggsave("output/my_plot.png", width = 8, height = 6, dpi = 300)
# Save a specific plot
p <- ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
ggsave("output/specific_plot.png", plot = p, width = 10, height = 6)
# Combine plots using patchwork
library(patchwork)
p1 <- ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
p2 <- ggplot(mtcars, aes(x = hp, y = mpg)) + geom_point()
p3 <- ggplot(mtcars, aes(x = factor(cyl))) + geom_bar()
# Side by side
p1 + p2library(plotly)
# Convert ggplot to plotly
p <- ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point(size = 3)
ggplotly(p)
# Native plotly
plot_ly(mtcars, x = ~wt, y = ~mpg, type = "scatter", mode = "markers",
color = ~factor(cyl), size = ~hp)
# 3D scatter
plot_ly(mtcars, x = ~wt, y = ~hp, z = ~mpg,
type = "scatter3d", mode = "markers",
color = ~factor(cyl))# highcharter
library(highcharter)
hchart(mtcars, "scatter", hcaes(x = wt, y = mpg, group = cyl))
# echarts4r
library(echarts4r)
mtcars %>%
e_charts(wt) %>%
e_scatter(mpg) %>%
e_title("MPG vs Weight")
# ggvis (interactive ggplot-like)
library(ggvis)
mtcars %>%
ggvis(~wt, ~mpg) %>%
layer_points(fill = ~factor(cyl))## [1] "2023-01-15"
## [1] "2023-01-15"
## [1] "2023-01-15"
## [1] "2023-01-15 14:30:00 UTC"
## [1] 2023
## [1] 1
## [1] 15
## [1] Sun
## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
## [1] 1
## [1] "2023-01-25"
## [1] "2023-03-15"
## [1] "2024-01-15"
## [1] 2023-01-01 UTC--2023-12-31 UTC
## [1] "432000s (~5 days)"
## [1] "432000s (~5 days)"
## [1] "1209600s (~2 weeks)"
## [1] "5d 0H 0M 0S"
## [1] "5d 0H 0M 0S"
## [1] "14d 0H 0M 0S"
## [1] "2026-02-08 18:01:21 EST"
## [1] "2026-02-08 23:01:21 EST"
## [1] "2023-01-01"
## [1] "2023-02-01"
## [1] "2023-01-01"
library(ggplot2)
library(dygraphs)
# --------------------------
# Create example ts data
# --------------------------
set.seed(123)
dates <- seq(as.Date("2020-01-01"), as.Date("2020-12-31"), by = "day")
values <- cumsum(rnorm(length(dates)))
# Base R ts object for the first year
ts_data <- ts(values[1:365], start = c(2020, 1), frequency = 365)
library(zoo)
zoo_ts <- zoo(ts_data)
ts_df <- data.frame(
date = index(zoo_ts),
value = coredata(zoo_ts)
)
library(ggplot2)
ggplot(ts_df, aes(x = date, y = value)) +
geom_line() +
theme_minimal() +
labs(title = "Time Series Plot")# Create seasonal time series
monthly_ts <- ts(rnorm(120) + 1:120/10 + sin(2*pi*(1:120)/12),
start = c(2015, 1), frequency = 12)
# Decompose
decomposed <- decompose(monthly_ts)
plot(decomposed)# STL decomposition (more robust)
stl_result <- stl(monthly_ts, s.window = "periodic")
plot(stl_result)# Load required libraries
library(sf)
library(dplyr)
library(spData) # example spatial datasets
# --------------------------
# Use built-in example dataset
# --------------------------
sf_data <- world # 'world' dataset from spData
sf_data <- st_as_sf(sf_data) # ensure it's an sf object
# Quick plot
plot(st_geometry(sf_data))
# --------------------------
# Buffer example (in meters if projected)
# --------------------------
# Transform to a projected CRS for meters
sf_proj <- st_transform(sf_data, crs = 3857)
buffered <- st_buffer(sf_proj, dist = 1000000) # 1,000 km buffer
# Plot buffered geometry
plot(st_geometry(buffered))
# --------------------------
# Intersection example
# --------------------------
# Let's intersect two countries as example
other_sf <- sf_data %>% filter(name_long %in% c("France", "Germany"))
overlap <- st_intersection(other_sf, other_sf) # self-intersection just as demo
plot(st_geometry(overlap), col = "red")
# --------------------------
# Distance matrix
# --------------------------
distances <- st_distance(sf_proj[1:5, ]) # only first 5 for simplicity
print(distances)
# --------------------------
# Centroid
# --------------------------
centroids <- st_centroid(sf_proj)
plot(st_geometry(centroids), col = "blue", pch = 16, add = TRUE)
# --------------------------
# Area (in projected CRS!)
# --------------------------
areas <- st_area(sf_proj)
head(areas)
# --------------------------
# Nearest feature
# --------------------------
# Example: find nearest neighbor for first 5 countries
nearest <- st_nearest_feature(sf_proj[1:5, ], sf_proj[6:10, ])
print(nearest)# Load libraries
library(terra)
library(sf)
library(dplyr)
# ---------------------
# Load example raster
# ---------------------
# Get the path for the included example raster file
elev_path <- system.file("raster/elev.tif", package = "spData")
# Check that file exists
if (elev_path == "") {
stop("Example raster not found – install 'spData' and ensure it is up to date.")
}
# Read raster as terra SpatRaster
r <- rast(elev_path)
# Quick plot
plot(r, main = "Example Elevation Raster")
# ---------------------
# Extract raster values at points
# ---------------------
points_sf <- st_as_sf(
data.frame(lon = c(-0.1, 2.3), lat = c(51.5, 48.8)),
coords = c("lon", "lat"),
crs = 4326
)
# Convert sf to terra vector
points_vect <- vect(points_sf)
# Extract raster values at those points
extracted <- terra::extract(r, points_vect)
print(extracted)
# ---------------------
# Raster calculator
# ---------------------
r2 <- r * 2 + 100
plot(r2, main = "Raster Calculator")
# ---------------------
# Aggregate (coarser resolution)
# ---------------------
r_coarse <- aggregate(r, fact = 5, fun = mean)
plot(r_coarse, main = "Aggregated Raster")
# ---------------------
# Resample raster
# ---------------------
r_resample <- resample(r, r_coarse, method = "bilinear")
plot(r_resample, main = "Resampled Raster")library(shiny)
# UI
ui <- fluidPage(
titlePanel("My First Shiny App"),
sidebarLayout(
sidebarPanel(
sliderInput("bins", "Number of bins:",
min = 5, max = 50, value = 30)
),
mainPanel(
plotOutput("distPlot")
)
)
)
# Server
server <- function(input, output) {
output$distPlot <- renderPlot({
x <- faithful$waiting
bins <- seq(min(x), max(x), length.out = input$bins + 1)
hist(x, breaks = bins, col = "darkgray", border = "white",
xlab = "Waiting time to next eruption (in mins)",
main = "Histogram of waiting times")
})
}
# Run app
shinyApp(ui = ui, server = server)# UI inputs
sliderInput("slider", "Slider:", min = 0, max = 100, value = 50)
numericInput("number", "Number:", value = 10)
textInput("text", "Text:", value = "Enter text")
selectInput("select", "Select:", choices = c("A", "B", "C"))
checkboxInput("checkbox", "Checkbox", value = TRUE)
dateInput("date", "Date:")
fileInput("file", "Choose file")
# Outputs
plotOutput("plot")
tableOutput("table")
textOutput("text")
verbatimTextOutput("code")
uiOutput("ui")server <- function(input, output) {
# Reactive expression (computed once per change)
data_filtered <- reactive({
mtcars %>% filter(cyl == input$cyl)
})
# Use reactive data
output$plot <- renderPlot({
ggplot(data_filtered(), aes(x = wt, y = mpg)) + geom_point()
})
output$table <- renderTable({
data_filtered()
})
# Observe (side effects only)
observe({
print(paste("Cylinders selected:", input$cyl))
})
# observeEvent (react to specific input)
observeEvent(input$button, {
showModal(modalDialog("Button clicked!"))
})
}library(shinydashboard)
ui <- dashboardPage(
dashboardHeader(title = "My Dashboard"),
dashboardSidebar(
sidebarMenu(
menuItem("Dashboard", tabName = "dashboard", icon = icon("dashboard")),
menuItem("Data", tabName = "data", icon = icon("table"))
)
),
dashboardBody(
tabItems(
tabItem(tabName = "dashboard",
fluidRow(
valueBoxOutput("value1"),
valueBoxOutput("value2")
),
fluidRow(
box(plotOutput("plot1"), width = 6),
box(plotOutput("plot2"), width = 6)
)
),
tabItem(tabName = "data",
fluidRow(
box(tableOutput("table"), width = 12)
)
)
)
)
)
server <- function(input, output) {
output$value1 <- renderValueBox({
valueBox(nrow(mtcars), "Total Cars", icon = icon("car"))
})
output$plot1 <- renderPlot({
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
})
output$table <- renderTable({
head(mtcars)
})
}
shinyApp(ui, server)# 1. Use isolate() to prevent reactivity
output$plot <- renderPlot({
# Only reacts to input$update, not input$bins
input$update
isolate({
hist(rnorm(100), breaks = input$bins)
})
})
# 2. Use eventReactive() for better control
data_processed <- eventReactive(input$go_button, {
# Only runs when button is clicked
process_data(input$data)
})
# 3. Use req() to prevent errors
output$plot <- renderPlot({
req(input$file) # Don't run if file not uploaded
data <- read.csv(input$file$datapath)
plot(data)
})
# 4. Use validate() for custom error messages
output$plot <- renderPlot({
validate(
need(input$data != "", "Please upload data"),
need(nrow(input$data) > 10, "Need at least 10 rows")
)
plot(input$data)
})
# 5. Use updateInput() to change inputs from server
observeEvent(input$reset, {
updateSliderInput(session, "bins", value = 30)
updateTextInput(session, "text", value = "")
})
# 6. Use downloadHandler for file downloads
output$download <- downloadHandler(
filename = function() {
paste("data-", Sys.Date(), ".csv", sep = "")
},
content = function(file) {
write.csv(data(), file, row.names = FALSE)
}
)
# 7. Use shinyjs for JavaScript interactions
library(shinyjs)
useShinyjs() # In UI
onclick("button", alert("Clicked!"))
hide("element")
show("element")# Profile code execution time
library(profvis)
profvis({
# Your code here
x <- numeric(1000)
for(i in 1:1000) {
x[i] <- i^2
}
})
# Benchmark alternatives
library(microbenchmark)
microbenchmark(
loop = {
x <- numeric(1000)
for(i in 1:1000) x[i] <- i^2
},
vectorized = {
x <- (1:1000)^2
},
times = 100
)# Always prefer vectorized operations
# BAD - Loop
result <- numeric(1000)
for(i in 1:1000) {
result[i] <- sqrt(i)
}
# GOOD - Vectorized
result <- sqrt(1:1000)
# Use apply family
mat <- matrix(1:12, 3, 4)
apply(mat, 2, sum) # Column sums## [1] 6 15 24 33
## [1] 6 15 24 33
## [1] "Low" "Low" "Low" "Low" "Low" "High" "High" "High" "High" "High"
my_project/
│
├── README.md # Project overview
├── my_project.Rproj # RStudio project file
│
├── data/
│ ├── raw/ # Original, immutable data
│ └── processed/ # Cleaned, processed data
│
├── scripts/
│ ├── 01_data_import.R
│ ├── 02_data_cleaning.R
│ ├── 03_analysis.R
│ └── 04_visualization.R
│
├── output/
│ ├── figures/
│ └── tables/
│
├── reports/
│ └── analysis_report.Rmd
│
└── functions/
└── helper_functions.R
# Use styler package to auto-format code
library(styler)
style_file("script.R")
style_dir("scripts/")
# Use lintr to check code quality
library(lintr)
lint("script.R")
# Naming conventions:
# - snake_case for variables and functions
# - UPPERCASE for constants
# - Meaningful names
# Good spacing
x <- 5 + 3 # GOOD
x<-5+3 # BAD
# Comment your code
# Calculate average age of customers
mean_age <- mean(customer_data$age)
# Use functions for repeated code
calculate_metrics <- function(data) {
# Your code
}| Package | Purpose | Key Functions |
|---|---|---|
dplyr |
Data manipulation | select(), filter(), mutate(),
summarise(), group_by() |
tidyr |
Data reshaping | pivot_longer(), pivot_wider(),
separate(), unite() |
data.table |
Fast data manipulation | [i, j, by], :=, setkey() |
stringr |
String manipulation | str_detect(), str_replace(),
str_extract() |
lubridate |
Date/time handling | ymd(), floor_date(),
interval() |
| Package | Purpose | Key Functions |
|---|---|---|
ggplot2 |
Static plots | ggplot(), geom_*(),
theme_*() |
plotly |
Interactive plots | plot_ly(), ggplotly() |
leaflet |
Interactive maps | leaflet(), addTiles(),
addMarkers() |
gganimate |
Animated plots | transition_*(), animate() |
| Package | Purpose | Key Functions |
|---|---|---|
caret |
Machine learning | train(), predict(),
confusionMatrix() |
randomForest |
Random forests | randomForest() |
glmnet |
Regularized regression | glmnet(), cv.glmnet() |
forecast |
Time series forecasting | auto.arima(), ets(),
forecast() |
| Package | Purpose | Key Functions |
|---|---|---|
data.table |
Fast operations | All operations |
arrow |
Large files | read_parquet(), open_dataset() |
sparklyr |
Spark interface | spark_connect(), spark_read_*() |
disk.frame |
Disk-based operations | csv_to_disk.frame() |
# Subscribe to R Weekly newsletter
# https://rweekly.org/
# Follow R developers on Twitter
# @hadleywickham, @JennyBryan, @WeAreRLadies
# Attend conferences
# useR!, rstudio::conf, satRdays
# Join local R user groups
# https://www.meetup.com/topics/r-project-for-statistical-computing/
# Read R Journal
# https://journal.r-project.org/# Task: Write a flexible summary function
# Create a function that:
# 1. Takes a data frame and column name
# 2. Returns mean, median, sd, min, max, and number of NAs
# 3. Handles non-numeric columns gracefully
# 4. Optionally removes outliers before calculation
# Your solution here## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows Server 2022 x64 (build 20348)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8
## [2] LC_CTYPE=English_United Kingdom.utf8
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.utf8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] zoo_1.8-14 dygraphs_1.1.1.6 lubridate_1.9.4 patchwork_1.3.2
## [5] viridis_0.6.5 viridisLite_0.4.2 ggplot2_4.0.2 data.table_1.17.8
## [9] bit64_4.6.0-1 bit_4.6.0 stringr_1.5.2 tidyr_1.3.2
## [13] MASS_7.3-65 dplyr_1.1.4 conflicted_1.2.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 utf8_1.2.6 generics_0.1.4 stringi_1.8.7
## [5] lattice_0.22-7 digest_0.6.37 magrittr_2.0.4 timechange_0.3.0
## [9] evaluate_1.0.5 grid_4.5.1 RColorBrewer_1.1-3 fastmap_1.2.0
## [13] jsonlite_2.0.0 Matrix_1.7-3 gridExtra_2.3 mgcv_1.9-3
## [17] purrr_1.1.0 scales_1.4.0 textshaping_1.0.4 jquerylib_0.1.4
## [21] cli_3.6.5 rlang_1.1.7 splines_4.5.1 withr_3.0.2
## [25] cachem_1.1.0 yaml_2.3.10 tools_4.5.1 memoise_2.0.1
## [29] vctrs_0.7.1 R6_2.6.1 lifecycle_1.0.4 htmlwidgets_1.6.4
## [33] ragg_1.5.0 pkgconfig_2.0.3 pillar_1.11.1 bslib_0.9.0
## [37] gtable_0.3.6 glue_1.8.0 systemfonts_1.3.1 xfun_0.53
## [41] tibble_3.3.0 tidyselect_1.2.1 rstudioapi_0.17.1 knitr_1.50
## [45] dichromat_2.0-0.1 farver_2.1.2 htmltools_0.5.8.1 nlme_3.1-168
## [49] rmarkdown_2.30 labeling_0.4.3 compiler_4.5.1 S7_0.2.0
# Package versions
installed_packages <- installed.packages()[, c("Package", "Version")]
key_packages <- c("dplyr", "ggplot2", "tidyr", "sf", "terra",
"shiny", "data.table", "lubridate", "forecast")
installed_packages[installed_packages[, "Package"] %in% key_packages, ]## Package Version
## data.table "data.table" "1.17.8"
## dplyr "dplyr" "1.1.4"
## forecast "forecast" "8.24.0"
## ggplot2 "ggplot2" "4.0.2"
## lubridate "lubridate" "1.9.4"
## sf "sf" "1.0-21"
## shiny "shiny" "1.11.1"
## terra "terra" "1.8-70"
## tidyr "tidyr" "1.3.2"
# Data Import/Export
read_csv() # Import CSV
write_csv() # Export CSV
readRDS() # Import RDS
saveRDS() # Export RDS
# Data Manipulation
select() # Select columns
filter() # Filter rows
mutate() # Create/modify columns
summarise() # Aggregate
group_by() # Group data
arrange() # Sort rows
left_join() # Join data
# Data Cleaning
na.omit() # Remove NAs
replace_na() # Replace NAs
distinct() # Remove duplicates
str_trim() # Trim whitespace
# Visualization
ggplot() # Initialize plot
geom_point() # Scatter plot
geom_line() # Line plot
geom_bar() # Bar plot
facet_wrap() # Faceting
ggsave() # Save plot
# Statistics
mean() # Mean
median() # Median
sd() # Standard deviation
cor() # Correlation
lm() # Linear model
summary() # Summary statistics
# Time Series
ts() # Create time series
decompose() # Decompose
lag() # Lag values
diff() # Difference
# Geospatial
st_read() # Read spatial data
st_transform() # Transform CRS
st_buffer() # Buffer
st_intersection() # IntersectionThis material is part of the training program by The National Centre for Research Methods © NCRM authored by Dr Somnath Chaudhuri (University of Southampton). Content is under a CC BY‑style permissive license and can be freely used for educational purposes with proper attribution.